home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
AmigActive 2
/
AACD 2.iso
/
AACD
/
Magazine
/
GraphicsCards
/
StormMesa
/
src
/
shade_asmppc.p
< prev
next >
Wrap
Text File
|
1999-02-04
|
13KB
|
357 lines
;
; Mesa 3-D graphics library
; Version: 2.5
; Copyright (C) 1995-1997 Brian Paul
;
; This library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Library General Public
; License as published by the Free Software Foundation; either
; version 2 of the License, or (at your option) any later version.
;
; This library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Library General Public License for more details.
;
; You should have received a copy of the GNU Library General Public
; License along with this library; if not, write to the Free
; Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
;
; shade_asmppc.p
; 9.1.1998 by Sam Jordan
;
; PowerPC assembler optimizations of several functions in shade.c
; Originally written for AMIGA OS/PowerOpen. To use this source on other
; PowerPC based platforms or with other programming models, some
; modifications might be needed.
include exec/types.i
USE_SUFFIX = 1 ; should be zero for any other
; compiler than StormC
SHINE_TABLE_SIZE = 200 ; keep consistent to types.h
IFNE USE_SUFFIX
XREF _pow__r
ELSEIF
XREF _pow
ENDC
STRUCTURE SHADE_OPT,0
APTR H_INF_NORM
APTR VP_INF_NORM
APTR MATDIFFUSE
APTR MATSPECULAR
FLOAT SHININESS
APTR SHINETABLE
LABEL SHADE_OPT_SIZE
; Note: these optimizations assume that the number of lights
; is less than two
XDEF _asm_shade_rgba_fast
_asm_shade_rgba_fast
; r3 = n
; r4 = side
; r5 = normal
; r6 = color
; r7 = baseColor
; r8 = sh (NULL if no lights are enabled)
stfd f31,-8*2(r1)
stfd f30,-8*3(r1)
stfd f29,-8*4(r1)
stfd f28,-8*5(r1)
stfd f27,-8*6(r1)
stfd f26,-8*7(r1)
stfd f25,-8*8(r1)
stfd f24,-8*9(r1)
mflr r0
stfd f23,-8*10(r1)
stfd f22,-8*11(r1)
stfd f21,-8*12(r1)
stfd f20,-8*13(r1)
stfd f19,-8*14(r1)
stfd f18,-8*15(r1)
stfd f17,-8*16(r1)
stfd f16,-8*17(r1)
stfd f15,-8*18(r1)
stfd f14,-8*19(r1)
stw r0,8(r1)
stw r31,-8*19-1*4(r1)
stw r30,-8*19-2*4(r1)
stw r29,-8*19-3*4(r1)
stw r28,-8*19-4*4(r1)
stw r27,-8*19-5*4(r1)
stw r26,-8*19-6*4(r1)
stw r25,-8*19-7*4(r1)
stw r24,-8*19-8*4(r1)
stw r23,-8*19-9*4(r1)
stwu r1,-320(r1)
lfs f27,_f_1(r2) ;f27 = 1.0
fsubs f26,f27,f27 ;f26 = 0.0
; sumA = (GLint) (baseColor[3] * 255.0F);
lfs f3,_f_255(r2)
lfs f1,12(r7)
fmuls f1,f1,f3
fctiwz f1,f1
stfd f1,-8(r1)
lwz r31,-4(r1) ;r31 = sumA
mr r30,r3 ;r30 = loopcounter
subi r29,r5,4 ;r29 = normal
subi r28,r6,4 ;r28 = color
mr r23,r8 ;r23 = sh
mr r27,r4 ;r27 = side
lfs f10,0(r7) ;f10 = sumR
lfs f11,4(r7) ;f11 = sumG
lfs f12,8(r7) ;f12 = sumB
mr. r8,r8
beq .next
lwz r9,VP_INF_NORM(r8)
lfs f23,0(r9) ;f23 = VP_inf_norm[0]
lfs f24,4(r9) ;f24 = VP_inf_norm[1]
lfs f25,8(r9) ;f25 = VP_inf_norm[2]
lwz r9,H_INF_NORM(r8)
lfs f20,0(r9) ;f20 = h_inf_norm[0]
lfs f21,4(r9) ;f21 = h_inf_norm[1]
lfs f22,8(r9) ;f22 = h_inf_norm[2]
lwz r9,MATDIFFUSE(r8)
lfs f17,0(r9) ;f17 = MatDiffuse[side][0]
lfs f18,4(r9) ;f18 = MatDiffuse[side][1]
lfs f19,8(r9) ;f19 = MatDiffuse[side][2]
lwz r9,MATSPECULAR(r8)
lfs f14,0(r9) ;f14 = MatSpecular[side][0]
lfs f15,4(r9) ;f15 = MatSpecular[side][1]
lfs f16,8(r9) ;f16 = MatSpecular[side][2]
lfs f28,SHININESS(r8) ;f28 = Material[side].Shininess
lwz r26,SHINETABLE(r8) ;r26 -> ShineTable
; for (j=0;j<n;j++) {
.next
.loop
; /* the normal vector */
; if (side==0) {
; nx = normal[j][0];
; ny = normal[j][1];
; nz = normal[j][2];
; }
; else {
; nx = -normal[j][0];
; ny = -normal[j][1];
; nz = -normal[j][2];
; }
mr. r27,r27
lfsu f0,4(r29) ;f0 = nx
lfsu f1,4(r29) ;f1 = ny
lfsu f2,4(r29) ;f2 = nz
beq .zero
fneg f0,f0
fneg f1,f1
fneg f2,f2
.zero
; for (light=ctx->Light.FirstEnabled; light; light=light->NextEnabled) {
mr. r23,r23
; sumR = baseColor[0];
; sumG = baseColor[1];
; sumB = baseColor[2];
; n_dot_VP = nx * light->VP_inf_norm[0]
; + ny * light->VP_inf_norm[1]
; + nz * light->VP_inf_norm[2];
fmuls f3,f23,f0
fmr f29,f10 ;f29 = sumR
fmadds f3,f24,f1,f3
fmr f30,f11 ;f30 = sumG
fmadds f3,f25,f2,f3 ;f3 = n_dot_VP
fmr f31,f12 ;f31 = sumB
beq .end
; if (n_dot_VP>0.0F) {
fcmpu f3,f26
ble .end
; sumR += n_dot_VP * lightMatDiffuse[0];
; sumG += n_dot_VP * lightMatDiffuse[1];
; sumB += n_dot_VP * lightMatDiffuse[2];
fmadds f29,f3,f17,f29
fmuls f4,f20,f0
fmadds f30,f3,f18,f30
fmadds f4,f21,f1,f4
fmadds f31,f3,f19,f31
fmadds f4,f22,f2,f4 ;f4 = n_dot_h
; n_dot_h = nx * light->h_inf_norm[0]
; + ny * light->h_inf_norm[1]
; + nz * light->h_inf_norm[2];
; if (n_dot_h>0.0F) {
fcmpu f4,f26
ble .end
; if (n_dot_h>1.0F) {
fcmpu f4,f27
ble .cont
; GLfloat spec_coef = pow( n_dot_h,
; ctx->Light.Material[side].Shininess );
; if (spec_coef>1.0e-10F) {
; sumR += spec_coef * light->MatSpecular[side][0];
; sumG += spec_coef * light->MatSpecular[side][1];
; sumB += spec_coef * light->MatSpecular[side][2];
; }
fmr f1,f4
fmr f2,f28
stfd f10,40(r1)
stfd f11,48(r1)
stfd f12,56(r1)
IFNE USE_SUFFIX
bl _pow__r
ELSE
bl _pow
ENDC
lfd f12,56(r1)
lfd f11,48(r1)
lfd f10,40(r1)
fmadds f29,f1,f14,f29
fmadds f30,f1,f15,f30
fmadds f31,f1,f16,f31
b .end
.cont
; int k = (int) (n_dot_h * (GLfloat) (SHINE_TABLE_SIZE-1));
; struct gl_material *m = &ctx->Light.Material[side];
; GLfloat spec_coef;
; if (m->ShineTable[k] < 0.0F)
; {
; m->ShineTable[k] = pow( n_dot_h, m->Shininess );
; }
; spec_coef = m->ShineTable[k];
; sumR += spec_coef * light->MatSpecular[side][0];
; sumG += spec_coef * light->MatSpecular[side][1];
; sumB += spec_coef * light->MatSpecular[side][2];
lfs f0,_f_s(r2)
fmuls f5,f0,f4
fctiwz f5,f5
stfd f5,-8(r1)
lwz r0,-4(r1)
slwi r25,r0,2
lfsx f1,r26,r25
fcmpu f1,f26
bge .skip
fmr f1,f4
fmr f2,f28
stfd f10,40(r1)
stfd f11,48(r1)
stfd f12,56(r1)
IFNE USE_SUFFIX
bl _pow__r
ELSE
bl _pow
ENDC
lfd f12,56(r1)
lfd f11,48(r1)
lfd f10,40(r1)
stfsx f1,r26,r25
.skip
fmadds f29,f1,f14,f29
fmadds f30,f1,f15,f30
fmadds f31,f1,f16,f31
.end
; FLOAT_COLOR_TO_UBYTE_COLOR( color[j][0], sumR );
; FLOAT_COLOR_TO_UBYTE_COLOR( color[j][1], sumG );
; FLOAT_COLOR_TO_UBYTE_COLOR( color[j][2], sumB );
; color[j][3] = sumA;
fsel f29,f29,f29,f26
fsel f30,f30,f30,f26
fsel f31,f31,f31,f26
lfs f3,_f_255(r2)
fsubs f0,f29,f27
fsubs f1,f30,f27
fsubs f2,f31,f27
subic. r30,r30,1
fsel f29,f0,f27,f29
fsel f30,f1,f27,f30
fsel f31,f2,f27,f31
fmuls f29,f29,f3
fmuls f30,f30,f3
fmuls f31,f31,f3
fctiwz f29,f29
fctiwz f30,f30
fctiwz f31,f31
stfd f29,-8(r1)
lwz r3,-4(r1)
slwi r3,r3,24
stfd f30,-8(r1)
lwz r0,-4(r1)
rlwimi r3,r0,16,8,15
stfd f31,-8(r1)
lwz r0,-4(r1)
rlwimi r3,r0,8,16,23
rlwimi r3,r31,0,24,31
stwu r3,4(r28)
bne .loop
addi r1,r1,320
lwz r23,-8*19-9*4(r1)
lwz r24,-8*19-8*4(r1)
lwz r25,-8*19-7*4(r1)
lwz r26,-8*19-6*4(r1)
lwz r27,-8*19-5*4(r1)
lwz r28,-8*19-4*4(r1)
lwz r29,-8*19-3*4(r1)
lwz r30,-8*19-2*4(r1)
lwz r31,-8*19-1*4(r1)
lwz r0,8(r1)
lfd f14,-8*19(r1)
lfd f15,-8*18(r1)
lfd f16,-8*17(r1)
lfd f17,-8*16(r1)
lfd f18,-8*15(r1)
lfd f19,-8*14(r1)
lfd f20,-8*13(r1)
lfd f21,-8*12(r1)
mtlr r0
lfd f22,-8*11(r1)
lfd f23,-8*10(r1)
lfd f24,-8*9(r1)
lfd f25,-8*8(r1)
lfd f26,-8*7(r1)
lfd f27,-8*6(r1)
lfd f28,-8*5(r1)
lfd f29,-8*4(r1)
lfd f30,-8*3(r1)
lfd f31,-8*2(r1)
blr
section data
_f_1 dc.s 1.0
_f_255 dc.s 255.0
_f_s dc.s SHINE_TABLE_SIZE-1
_f_huge dc.s 255.0*128.0*65536.0